load("./data/Robjects/02_annot.RData")
load("./data/Robjects/03_DDS.RData")

creating ranks

getRanks <- function(res, annot) {
  # only taking genes which have entrezgene_ids assigned to them
  
  genes_with_entrez <- select(annot, GeneID, entrezgene_id) %>% 
    filter(!is.na(entrezgene_id))
  
  ranks <- as.data.frame(res) %>%
    tibble::rownames_to_column("GeneID") %>%
    merge(genes_with_entrez, by = "GeneID") %>%
    arrange(desc(stat)) %>% 
    select(entrezgene_id, stat) %>% 
    tibble::deframe() # creating a named num from two columns
  return(ranks)
}

ranks.gastroc <- getRanks(res.gastroc, annot)
ranks.soleus <- getRanks(res.soleus, annot)

barplots of ranks

# no idea how to plot this with ggplot (one column) ...

# ggpubr::ggarrange(
  barplot(sort(ranks.gastroc, decreasing = T), main = "Wald Test ranks, gastroc")

  barplot(sort(ranks.soleus, decreasing = T), main = "Wald Test ranks, soleus")

# )

loading pathways

Pathways are provided by http://www.gsea-msigdb.org/gsea/msigdb/mouse/collections.jsp

For now the Canonical pathways are used. These gene sets represent biological a biological process. They are composed from the following databases taking a subset of CP:

database gene sets
BioCarta 252
Reactome 1249
WikiPathways 186
# MH <- qusage::read.gmt("./data/pathways/mh.all.v2022.1.Mm.entrez.gmt") # 50 hallmark genes
# M2 <- qusage::read.gmt("./data/pathways/m2.all.v2022.1.Mm.entrez.gmt") # curated gene set with 2600 genes
CGP <- qusage::read.gmt("./data/pathways/m2.cp.v2022.1.Mm.entrez.gmt") # canonical pathways (1687 gene sets)
getwd()
## [1] "/Users/nick/Documents/github-personal/R/proteofit"

applying fgsea

fgseaRes.gastroc <- fgsea(pathways = CGP, 
                  stats    = ranks.gastroc,
                  minSize  = 15,
                  maxSize  = 500)

fgseaRes.soleus <- fgsea(pathways = CGP, 
                  stats    = ranks.soleus,
                  minSize  = 15,
                  maxSize  = 500)

saveRDS(fgseaRes.gastroc, file="./data/Robjects/04_fgseaRes.gastroc.rds")
saveRDS(fgseaRes.soleus, file="./data/Robjects/04_fgseaRes.soleus.rds")

ordering by padj
“NES – enrichment score normalized to mean enrichment of random samples of the same size”

# ' obtain top pathways ordered by padj and use `ES` for up or down regulation
get_top_pathways <- function(fgseaRes, up = TRUE, pCutoff=0.01, n=10) {
  .updown <- ifelse(up, `>`, `<`)
  
  top.pathways <- fgseaRes %>%
    filter(.updown(ES,0), padj < pCutoff) %>%
    arrange(padj) %>% 
    slice_head(n=n)
  
 return(top.pathways) 
}
top.pathways <- get_top_pathways(fgseaRes = fgseaRes.gastroc)
top.pathways

Enrichment score plot

as example the gene with the

# TODO: plot the top 9 and add labels with description
top1.pathway <- top.pathways[1]$pathway
plotEnrichment(CGP[[top1.pathway]], ranks.gastroc) +
  ggtitle(label=top1.pathway)

# TODO: create plot for top up and down regulated pathways

# ' plots top n enrichment plots for the given fgsea result
plot_top_enrichment <- function(fgseaRes, pathways, ranks, n = 9, up = TRUE) {
  # extracting the top n pathways
  top.pathways <- get_top_pathways(fgseaRes, up=up, pCutoff=params$pCutoff, n=n)
  
  plot.list <- list()
  # lims <- list("x" = c(0,17000), "y" = c(-0.8,0.0))
  
  for (i in 1:nrow(top.pathways)) {
    # filling plot.list with enrichmentPlots 
    # TODO: how can I use facet_wrap for this?
    pathway <- top.pathways[i]$pathway
    plt <- plotEnrichment(pathways[[pathway]], ranks) +
      # TODO: adjust yaxis to the same scale
      # TODO: keep axis.text.x only on the lower row
      # TODO: keep axis.text.y only on the right column
      theme(
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
      ) # +
      # coord_cartesian(xlim = lims$x, ylim = lims$y)
    plot.list[[i]] <- plt
  }
    
  arrange_plts(plot.list, show.plt_labels = T)
}

# ' helper function to arragen the plot from the enrichment
arrange_plts <- function(plt.list, show.plt_labels=T) {
  nplts <- length(plt.list)
  plt <- plt.list[1]
  xlab <- plt$labels$x
  ylab <- plt$labels$y
  
  # set axis to the same scale
  lims <- list("x" = c(0,17000), "y" = c(-0.8,0.0))
  
  # remove axis
  
  # arrange the plots
  fig_labels <- ""
  if(show.plt_labels){
    fig_labels <- LETTERS[1:nplts]
  }
  figure <- ggpubr::ggarrange(plotlist = plt.list,
                              labels = fig_labels) %>% 
    annotate_figure(
                  left = text_grob(ylab, rot = 90),
                  bottom = text_grob(xlab)
                  )
  
  return(figure)
}



# plot_labels <-
#     data.frame("label" = LETTERS[1:10], "pathway" = top.pathways)
# knitr::kable(caption = "plot labels", plot_labels)

gastroc up

plot_top_enrichment(fgseaRes.gastroc, CGP, ranks.gastroc, up=T)

# TODO: add plot labels to return argument of plot_top_enrichment (use list probably)
plot_labels <-
    data.frame("label" = LETTERS[1:9], "pathway" = get_top_pathways(fgseaRes.gastroc, up=T, n=9)$pathway)
knitr::kable(caption = "plot labels", plot_labels)
plot labels
label pathway
A REACTOME_NEUTROPHIL_DEGRANULATION
B WP_TYROBP_CAUSAL_NETWORK_IN_MICROGLIA
C REACTOME_HEMOSTASIS
D WP_MICROGLIA_PATHOGEN_PHAGOCYTOSIS_PATHWAY
E WP_APOPTOSIS
F REACTOME_IMMUNOREGULATORY_INTERACTIONS_BETWEEN_A_LYMPHOID_AND_A_NON_LYMPHOID_CELL
G WP_CHEMOKINE_SIGNALING_PATHWAY
H BIOCARTA_TNFR2_PATHWAY
I WP_FIBRIN_COMPLEMENT_RECEPTOR_3_SIGNALING_PATHWAY

gastroc down

plot_top_enrichment(fgseaRes.gastroc, CGP, ranks.gastroc, up=F)

plot_labels <-
    data.frame("label" = LETTERS[1:9], "pathway" = get_top_pathways(fgseaRes.gastroc, up=F, n=9)$pathway)
knitr::kable(caption = "plot labels", plot_labels)
plot labels
label pathway
A REACTOME_THE_CITRIC_ACID_TCA_CYCLE_AND_RESPIRATORY_ELECTRON_TRANSPORT
B REACTOME_RESPIRATORY_ELECTRON_TRANSPORT_ATP_SYNTHESIS_BY_CHEMIOSMOTIC_COUPLING_AND_HEAT_PRODUCTION_BY_UNCOUPLING_PROTEINS
C REACTOME_RESPIRATORY_ELECTRON_TRANSPORT
D REACTOME_ANTIGEN_PROCESSING_UBIQUITINATION_PROTEASOME_DEGRADATION
E REACTOME_METABOLISM_OF_AMINO_ACIDS_AND_DERIVATIVES
F REACTOME_TRANSLATION
G WP_ELECTRON_TRANSPORT_CHAIN
H REACTOME_NEDDYLATION
I REACTOME_COMPLEX_I_BIOGENESIS

soleus up

plot_top_enrichment(fgseaRes.soleus, CGP, ranks.soleus, up=T)

plot_labels <-
    data.frame("label" = LETTERS[1:9], "pathway" = get_top_pathways(fgseaRes.soleus, up=T, n=9)$pathway)
knitr::kable(caption = "plot labels", plot_labels)
plot labels
label pathway
A REACTOME_SRP_DEPENDENT_COTRANSLATIONAL_PROTEIN_TARGETING_TO_MEMBRANE
B REACTOME_FORMATION_OF_A_POOL_OF_FREE_40S_SUBUNITS
C WP_CYTOPLASMIC_RIBOSOMAL_PROTEINS
D REACTOME_NONSENSE_MEDIATED_DECAY_NMD_INDEPENDENT_OF_THE_EXON_JUNCTION_COMPLEX_EJC
E WP_TYROBP_CAUSAL_NETWORK_IN_MICROGLIA
F REACTOME_EUKARYOTIC_TRANSLATION_INITIATION
G REACTOME_NONSENSE_MEDIATED_DECAY_NMD
H REACTOME_MAJOR_PATHWAY_OF_RRNA_PROCESSING_IN_THE_NUCLEOLUS_AND_CYTOSOL
I REACTOME_EXTRACELLULAR_MATRIX_ORGANIZATION

soleus down

plot_top_enrichment(fgseaRes.soleus, CGP, ranks.soleus, up=F)

plot_labels <-
    data.frame("label" = LETTERS[1:9], "pathway" = get_top_pathways(fgseaRes.soleus, up=F, n=9)$pathway)
knitr::kable(caption = "plot labels", plot_labels)
plot labels
label pathway
A REACTOME_KEAP1_NFE2L2_PATHWAY
B REACTOME_CELLULAR_RESPONSE_TO_CHEMICAL_STRESS
C REACTOME_GLI3_IS_PROCESSED_TO_GLI3R_BY_THE_PROTEASOME
D REACTOME_DEGRADATION_OF_DVL
E REACTOME_REGULATION_OF_MRNA_STABILITY_BY_PROTEINS_THAT_BIND_AU_RICH_ELEMENTS
F REACTOME_RUNX1_REGULATES_TRANSCRIPTION_OF_GENES_INVOLVED_IN_DIFFERENTIATION_OF_HSCS
G REACTOME_ASYMMETRIC_LOCALIZATION_OF_PCP_PROTEINS
H REACTOME_CELLULAR_RESPONSES_TO_STIMULI
I REACTOME_FCERI_MEDIATED_NF_KB_ACTIVATION

GSEA table plot

gastroc

top significant pathways:

# creating up and down regulated pathway vectors separately to maintain order

topUp <- get_top_pathways(fgseaRes.gastroc, up=T, pCutoff = params$pCutoff, n=10)
topDown <- get_top_pathways(fgseaRes.gastroc, up=F, pCutoff = params$pCutoff, n=10)
topPathways <- bind_rows(topUp, topDown) %>%
  arrange(-NES) %>%
  pull(pathway)

plotGseaTable(
  pathways = CGP[topPathways],
  stats = ranks.gastroc,
  fgseaRes = fgseaRes.gastroc,
  gseaParam = 0.5,
  render = TRUE
) %>%
  ggpubr::as_ggplot() # needed since, for whatever reason only `NULL` gets returned if `plotGseaTable` is rendered inline

soleus

top significant pathways:

# creating up and down regulated pathway vectors separately to maintain order

topUp <- get_top_pathways(fgseaRes.soleus, up=T, pCutoff = params$pCutoff, n=10)
topDown <- get_top_pathways(fgseaRes.soleus, up=F, pCutoff = params$pCutoff, n=10)
topPathways <- bind_rows(topUp, topDown) %>%
  arrange(-NES) %>%
  pull(pathway)

plotGseaTable(
  pathways = CGP[topPathways],
  stats = ranks.soleus,
  fgseaRes = fgseaRes.soleus,
  gseaParam = 0.5,
  render = TRUE
) %>%
  ggpubr::as_ggplot() # needed since, for whatever reason only `NULL` gets returned if `plotGseaTable` is rendered inline

most differential regulated pathways, both tissues

using NES from the fgsea result filtering on the set pCutoff=0.01 yields the following plot:

prepare_result <- function(fgseaRes, colname="NES", pCutoff) {
  fgseaRes.prep <- fgseaRes %>%
    data.frame() %>%
    filter(padj < pCutoff) %>%
    dplyr::rename(!!colname := NES) %>%
    dplyr::select(!!colname, pathway)
  return(fgseaRes.prep)
}

prep.gastroc <- prepare_result(fgseaRes.gastroc, colname="gastroc", params$pCutoff)
prep.soleus  <- prepare_result(fgseaRes.soleus,  colname="soleus", params$pCutoff)


# combining wald-Test data from both tissues and ordering in quadrants
res.combined <- merge(prep.gastroc,
                      prep.soleus,
                      by = "pathway") %>%
  mutate(diff.exp = case_when(
    gastroc < 0 & soleus < 0 ~ "both down",
    gastroc > 0 & soleus > 0 ~ "both up",
    gastroc < 0 & soleus > 0 ~ "ga down, sol up",
    gastroc > 0 & soleus < 0 ~ "ga up, sol down",
    TRUE ~ "different"
  ))

# only annotating top_n pathways by: 
# removing all pathway_names except the top_n_pathways (sum of absolute NES numbers)
# top_n_pathways <- 10
# top_labels <- res.combined %>%
#   group_by(diff.exp) %>% 
#   arrange(desc(abs(gastroc) + abs(soleus))) %>%
#   filter(row_number() %in% c(1:top_n_pathways)) %>% 
#   ungroup() %>% 
#   .$pathway

# res.combined <- res.combined %>% 
#   mutate(pathway = ifelse(pathway %in% top_labels, pathway, ""))

# final plot
p <- ggplot(res.combined, aes(x = gastroc, y = soleus, text=pathway)) +
  geom_vline(xintercept = 0) + 
  geom_hline(yintercept = 0) + 
  geom_point(aes(color = diff.exp)) +
  # scale_color_manual(values = c("red", "chartreuse1", "bisque", "royalblue")) +
  labs(x = "gastroc", y = "soleus") +
  # ggrepel::geom_label_repel(max.overlaps = 20) + 
  ggtitle(label = "NES")

plotly::ggplotly(p, tooltip = "all")

barplot

ggplot(res.combined, aes(x = diff.exp)) +
  geom_bar(aes(fill = diff.exp))